	
********************************************************************************
** 	TITLE: b8_0_sag_estimate_sample.do
**
**	PROJECT: IGNITE
** 
**  PURPOSE:  This File combines Saginaw Court History Records (ROAs) with 
**	Saginaw Booking records 
** 
**	Note: Akin to B5, We perform key drops from sample at end of generating sag_appended. 
********************************************************************************
		set sortseed 13

	use "$output_data/saginaw_roa_postcrim_parsed.dta", clear
	append using "$output_data/saginaw_roa_precrim_parsed.dta"
	
	gtools, upgrade
	gen repull = (!missing(roa_case_id) &  missing(case_status))

	drop if repull
	
	append using "$output_data/sag_roa_crim_repulled_parsed.dta"

	unab vli_all : *

* rescheduling
	cap drop any_resch
	unab vli_event : e_event*
	gen any_resch = 0
	foreach v in `vli_event'{
		replace any_resch = 1 if strpos(lower(`v'),"resch")
		replace any_resch = 1 if strpos(lower(`v'),"adjourn") 

//   ADJUDICATION - BY PLEA
		replace any_resch = 1 if strpos(lower(`v'),"adj") & !strpos(lower(`v'),"adjud")
	}

//Event Dates and corrections, given formatting
forvalues j=1/533{
   		 
   	 * originially scheduled dates from scheduling notice
	 
   		 gen schedule`j' = date(substr(e_event_comment`j',35,10),"MDY",2050) if strpos(lower(e_event_comment`j'),"schd for ") & ///
   			 (strpos(lower(e_event_comment`j')," 1/")|strpos(lower(e_event_comment`j')," 2/")|strpos(lower(e_event_comment`j')," 3/")| ///
   			 strpos(lower(e_event_comment`j')," 4/")|strpos(lower(e_event_comment`j')," 5/")|strpos(lower(e_event_comment`j')," 6/")| ///
   			 strpos(lower(e_event_comment`j')," 7/")|strpos(lower(e_event_comment`j')," 8/")|strpos(lower(e_event_comment`j')," 9/")| ///
   			 strpos(lower(e_event_comment`j'),"10/")|strpos(lower(e_event_comment`j'),"11/")|strpos(lower(e_event_comment`j'),"12/"))
   		

//APR SENTENCING           SCHEDULED  7/19/2016 AT  1:31 PM Judge: D3
	 
   replace schedule`j' = date(substr(e_event_comment`j',36,10),"MDY",2050) if  missing(schedule`j') & strpos(lower(e_event_comment`j')," scheduled ") & !strpos(lower(e_event_comment`j')," scheduled for")& ///
   			 (strpos(lower(e_event_comment`j')," 1/")|strpos(lower(e_event_comment`j')," 2/")|strpos(lower(e_event_comment`j')," 3/")| ///
   			 strpos(lower(e_event_comment`j')," 4/")|strpos(lower(e_event_comment`j')," 5/")|strpos(lower(e_event_comment`j')," 6/")| ///
   			 strpos(lower(e_event_comment`j')," 7/")|strpos(lower(e_event_comment`j')," 8/")|strpos(lower(e_event_comment`j')," 9/")| ///
   			 strpos(lower(e_event_comment`j'),"10/")|strpos(lower(e_event_comment`j'),"11/")|strpos(lower(e_event_comment`j'),"12/"))

//APR JURY TRIAL      SCHEDULED FOR 10/14/1999 AT  9:00 AM Judge: JP

   replace schedule`j' = date(substr(e_event_comment`j',35,10),"MDY",2050) if  missing(schedule`j') & strpos(lower(e_event_comment`j')," scheduled for")& ///
   			 (strpos(lower(e_event_comment`j')," 1/")|strpos(lower(e_event_comment`j')," 2/")|strpos(lower(e_event_comment`j')," 3/")| ///
   			 strpos(lower(e_event_comment`j')," 4/")|strpos(lower(e_event_comment`j')," 5/")|strpos(lower(e_event_comment`j')," 6/")| ///
   			 strpos(lower(e_event_comment`j')," 7/")|strpos(lower(e_event_comment`j')," 8/")|strpos(lower(e_event_comment`j')," 9/")| ///
   			 strpos(lower(e_event_comment`j'),"10/")|strpos(lower(e_event_comment`j'),"11/")|strpos(lower(e_event_comment`j'),"12/"))
			 
//APR TRIAL           SCHEDULED FOR  4/08/99 AT  8:30 AM Judge: C2

   replace schedule`j' = date(substr(e_event_comment`j',35,8) ,"MD19Y",2050) if missing(schedule`j') & strpos(lower(e_event_comment`j')," scheduled for")& schedule`j' == ///
   			 (strpos(lower(e_event_comment`j')," 1/")|strpos(lower(e_event_comment`j')," 2/")|strpos(lower(e_event_comment`j')," 3/")| ///
   			 strpos(lower(e_event_comment`j')," 4/")|strpos(lower(e_event_comment`j')," 5/")|strpos(lower(e_event_comment`j')," 6/")| ///
   			 strpos(lower(e_event_comment`j')," 7/")|strpos(lower(e_event_comment`j')," 8/")|strpos(lower(e_event_comment`j')," 9/")| ///
   			 strpos(lower(e_event_comment`j'),"10/")|strpos(lower(e_event_comment`j'),"11/")|strpos(lower(e_event_comment`j'),"12/"))
				
   		 cap format schedule`j' %td
}	

//Day of Week Event Scheduled
 gen dow_scheduled=dow(schedule1)
 gen date_scheduled = schedule1
 forvalues j=2/533{
   	 replace dow_scheduled=dow(schedule`j') if mi(dow_scheduled)
	    	 replace date_scheduled=schedule`j' if mi(date_scheduled)
}

 drop if dow_scheduled==0|dow_scheduled==6


* crime types
	gen num_charges=0
	
	gen D_drug_fel=0
	gen D_violent_fel=0
	gen D_property_fel=0
	gen D_dui_fel=0
	gen D_no_license_fel = 0
	gen D_weapon_fel = 0
	
	gen D_drug_mis=0
	gen D_violent_mis=0
	gen D_property_mis=0
	gen D_dui_mis=0
	gen D_no_license_mis = 0
	gen D_weapon_mis = 0
	
	forvalues j=1/21{
		replace num_charges = num_charges+1 if !mi(c_current_charge`j')	
		
	
	gen D_person_`j' = strpos(lower(c_current_charge`j'),"assault")|strpos(lower(c_current_charge`j'),"homicide")| ///
		 strpos(lower(c_current_charge`j'),"sex")| strpos(lower(c_current_charge`j'),"robbery")| ///
		 strpos(lower(c_current_charge`j'),"stalk")|strpos(lower(c_current_charge`j'),"kidnap")| ///
		 strpos(lower(c_current_charge`j'),"imprison")| ///
		 ( strpos(lower(c_current_charge`j'),"domestic")&strpos(lower(c_current_charge`j'),"violence"))
		 
		 
	gen D_property_`j' = strpos(lower(c_current_charge`j'),"arson")|strpos(lower(c_current_charge`j'),"break")| ///
		 strpos(lower(c_current_charge`j'),"burglary")|strpos(lower(c_current_charge`j'),"counterfeit")| ///
		 strpos(lower(c_current_charge`j'),"forgery")| strpos(lower(c_current_charge`j'),"destruct")| ///
		 strpos(lower(c_current_charge`j'),"embezzlement")|strpos(lower(c_current_charge`j'),"larceny")| ///
		 strpos(lower(c_current_charge`j'),"theft")|strpos(lower(c_current_charge`j'),"fraud")| ///
		 strpos(lower(c_current_charge`j'),"home invasion")|strpos(lower(c_current_charge`j'),"stolen")| ///
		 strpos(lower(c_current_charge`j'),"trespass")|strpos(lower(c_current_charge`j'),"retail")| ///
		 strpos(lower(c_current_charge`j'),"carjack")|strpos(lower(c_current_charge`j'),"uttering")
		
	gen D_po_`j' =( strpos(lower(c_current_charge`j'),"animal")|strpos(lower(c_current_charge`j'),"support")| ///
		strpos(lower(c_current_charge`j'),"disorderly")|strpos(lower(c_current_charge`j'),"flee")| ///
		strpos(lower(c_current_charge`j'),"resist") ///
		) ///
		& D_person_`j'==0
		
	gen D_dui_`j' = (strpos(lower(c_current_charge`j'),"operating")&strpos(lower(c_current_charge`j'),"intoxicated"))| ///
		(strpos(lower(c_current_charge`j'),"operat")&strpos(lower(c_current_charge`j'),"alchohol"))| ///
		(strpos(lower(c_current_charge`j'),"operat")&strpos(lower(c_current_charge`j'),"sub"))
	
	gen D_drug_`j' = ( strpos(lower(c_current_charge`j'),"drug")|strpos(lower(c_current_charge`j'),"substance")| ///
		(strpos(lower(c_current_charge`j'),"sub")&strpos(lower(c_current_charge`j'),"manufacture"))| ///
		strpos(lower(c_current_charge`j'),"narcotic")|strpos(lower(c_current_charge`j'),"marijuana")| ///
		(strpos(lower(c_current_charge`j'),"cont")&strpos(lower(c_current_charge`j'),"sub")) ///
		) & ///
		D_dui_`j'==0
		
	gen D_weapons_`j' =  (strpos(lower(c_current_charge`j'),"weapon")|strpos(lower(c_current_charge`j'),"firearm"))& ///
		D_person_`j'+D_property_`j'+D_drug_`j'+D_po_`j'+D_dui_`j'==0
		
	gen D_traffic_other_`j' = ( strpos(lower(c_current_charge`j'),"driv")| ///
		strpos(lower(c_current_charge`j'),"drove")| ///
		strpos(lower(c_current_charge`j'),"operat")| ///
		strpos(lower(c_current_charge`j'),"license")|strpos(lower(c_current_charge`j'),"plates") ///
		)& ///
		D_dui_`j'==0 & D_property_`j'==0
	
	gen D_crime_other_`j' = D_person_`j'+D_property_`j'+D_drug_`j'+D_po_`j'+D_dui_`j'+D_weapons_`j'+D_traffic_other_`j'==0
	}
	
	foreach crime in person property po dui drug weapons traffic_other crime_other{
		egen num_`crime' = rowtotal(D_`crime'*)
		egen D_`crime' = rowmax(D_`crime'*)
		
	}
	

//Updating the public defender variable
capture drop D_public_defender
		gen D_public_defender = strpos(lower(attorney_type),"public")| ///
			strpos(lower(attorney_type),"appointed")
			
	keep case_id roa_case_id judge_name date_filed case_type ///
		case_status entitlement defendent attorney_name attorney_type ///
		any_resch D_public_defender num_charges num_person D_person ///
		num_property D_property num_po D_po num_dui D_dui num_drug ///
		D_drug num_weapons D_weapons num_traffic_other D_traffic_other ///
		num_crime_other D_crime_other dow_scheduled date_scheduled
		
	ren case_id case_nr

* merge with booking records
	preserve 

//Uses the Boooking file that has all the demographic info and return_3m, etc. 
		use "$output_data/bookingrecords_saginaw_work.dta", clear

		tempfile booking_records
		save `booking_records'
		
		keep booking_nr case_nr*
		greshape long case_nr, i(booking_nr) j(temp_id)
		drop if mi(case_nr)
		
		tempfile booking_records_temp
		save `booking_records_temp'
	restore

	
	merge 1:m case_nr using `booking_records_temp', nogen keep(2 3)

*****Dates: case filed	
		foreach v of varlist date_filed {
			gen `v'_new = date(`v',"MDY")
			format `v'_new %td
			drop `v'
			ren `v'_new `v'
		}
		
//Bysort Inmate-bookdate (within sort by date_filed)		
	bys booking_nr (date_filed): gen id=_n
	order booking_nr date_filed, first
	
//Dropping case counter from Booking records, not organized by filing date. id is. 
	drop temp_id
	
	greshape wide case_nr roa_case_id judge_name date_filed case_type ///
		case_status entitlement defendent attorney_name attorney_type ///
		any_resch D_public_defender num_charges num_person D_person num_property ///
		D_property num_po D_po num_dui D_dui num_drug D_drug num_weapons ///
		D_weapons num_traffic_other D_traffic_other num_crime_other D_crime_other ///
		dow_scheduled date_scheduled, ///
		i(booking_nr) j(id)
	
	merge 1:1 booking_nr using `booking_records', nogen keep(2 3)

//Case Types	
foreach t in EX FD FT FY OD  OI OM  OT  SD SM  ST{
			gen `t' = 0
		}
		forvalues j=1/11{
			gen type`j'=substr(case_nr`j',-2,.)

			foreach t in EX FD FT FY OD  OI OM  OT  SD SM  ST{
				replace `t'=1 if type`j'=="`t'"
			}
		}
		
		gen D_felony=(EX+FD+FT+FY>0)
		
		egen mis_dui = rowmax(OD  SD)
		egen mis_traffic = rowmax(OT  ST)
		egen mis_crime = rowmax(OM  SM)	

	//updating instead of any public defender in all cases, to whether have a public defender in the first case.
		gen D_public_defender= (D_public_defender1 == 1)

//Day of Week Event Scheduled for first event	
	 egen min_scheduled = rowmin(date_scheduled*)
	 gen dow_scheduled=dow(min_scheduled)

	 *Dow_scheduled for missing observations
		replace dow_scheduled = 99 if missing(dow_scheduled)
	
* generate spell level variables
	egen any_resch = rowmax(any_resch*)

* Number of Delays Calculation
	egen num_delays  = rowtotal(any_resch*)
	
	count if !mi(case_nr1)
	
/*
	//Number of Charges, summing up all the charges for all cases of a booking number. 
*/	
	cap drop cat_num_charge
	gen cat_num_charge = n_charge
	replace cat_num_charge=6 if n_charge>=6
	
	
gen month_booked = month(booking_date)

//Defining ind_id as inmate 
gen inmate = "s-" + string(ind_id)


//Renaming variables to be consistent. 
rename black D_Black 
rename female D_female

//Defining Post-IGNITE dummy
		gen ignite_0_rec = .
		replace ignite_0_rec = (release_date - booking_date)/30 if booking_date >= date("09/08/2020","MDY")
		replace ignite_0_rec = (release_date - date("09/08/2020","MDY"))/30 if booking_date < date("09/08/2020","MDY")
		replace ignite_0_rec = 0 if ignite_0_rec<0	

//Genesee County Dummy		
	gen genesee = 0
	
//Design and auxiliary controls needed. All missing, due to lack of address information	
	gen cat_division = 999 //missing
	
	foreach v in pctebll pc_hs log_med_hhincome pc_black{
	gen `v' = -1
	}
	gen D_missing = 1
	
	save "$output_data/sag_appended.dta", replace

//going to keep only e_sample after generating sag_appended.dta, a new dataset
keep if e_sample

//Tempfile of Saginaw County estimate sample, with Within county recidivism measure	
//Producing the subsequent file as a temp as it will be updated later. 
		//File meant to identify relevant observations in study. 
		//recidivism will be updated later with Genesee County. 
	save "$output_data/sag_post_roa_all_temp.dta", replace
	